Prep

#' DEAprep
#'
#' Generates control samples out of the average values over all supplied samples, combines spliced and unspliced
#' assays and calculates their logCPM & log2FC data. 
#' All is assembled into one SE object. 
#' 
#'
#' @param se SummarizedExperiment object containing assays of raw counts of spliced & unspliced tx
#'
#' @return an SE object of combined spliced and unspliced data as well as control samples generated from averaging over all treatment samples
#' 
DEAprep <- function(se, control){
  
  # allocation
  
  readtype <- list(spliced="spliced",unspliced="unspliced")
  
  ## generate DGEList object
  dds1 <- DGEList(assays(se)$spliced)
  dds2 <- DGEList(assays(se)$unspliced)
  
  ## combine the spliced & unspliced assays
  dds <- cbind(dds1, dds2)

  
  # generate colData

  dd1 <- colData(se)[,c("rep","miRNA")]
  dd1$readtype <- readtype$spliced
  
  ## duplicate dd to have data for combined spliced & unspliced assay
  dd <- rbind(dd1, dd1)
  dd$readtype[(nrow(dd1)+1):nrow(dd)] <- readtype$unspliced
  
  ## rename both dds & dd object features
  n.cols <- lapply(readtype, function(x) sapply(colnames(se), function(y) paste(y, x, sep=".") ))
  
  colnames(dds) <- c(n.cols$spliced, n.cols$unspliced)
  rownames(dd) <- c(n.cols$spliced, n.cols$unspliced)

  
  # rebuild SE object
  
  ## SE object with logCPM & logFC assays
  se <- SummarizedExperiment(assays=list(counts=dds$counts), 
                             rowData=rowData(se), 
                             colData=dd) 
  assays(se)$logcpm <- log1p(cpm(calcNormFactors(DGEList(assay(se)))))
  se <- SEtools::log2FC(se, controls = se$miRNA==control, by = se$readtype, fromAssay = "logcpm")
  
  return(se)
}

PCA

DEA

#' exonDEA
#'
#' @param se SE object containing assays with combined spliced & unspliced counts
#' @param model function: model matrix function to test
#' @param model0 function: model matrix function to be tested against
#' @param control string: name of control samples inside colData() of supplied SE object
#'
#' @return SE object containing DEA results over all treatments & over individual ones
#' 
exonDEA <- function(se, model, model0=~1, control){
  
  ## allocation
  se$miRNA <- relevel(droplevels(se$miRNA), ref=control)
  se$readtype <- relevel(factor(se$readtype), ref="unspliced")
  se$rep <- droplevels(se$rep)
  dd <- as.data.frame(colData(se))
  ## normalization
  dds <- calcNormFactors(DGEList(assays(se)$counts))
  ## models
  mm <- model.matrix(model, data=dd)
  mm0 <- model.matrix(model0, data=dd)
  testCoeff <- setdiff(colnames(mm), colnames(mm0))
  ## estimate dispersion
  dds <- estimateDisp(dds,mm)
  ## fit negative binomial distribution on counts per gene (use glmFit for few replicates)
  fit <- glmFit(dds, mm)
  ## fit a GLM on the data
  lrt.comb <- glmLRT(fit, testCoeff)
  ## top genes that change relative to stage 0
  res.comb <- as.data.frame(topTags(lrt.comb, Inf))
  ## fit linear model dropping one sample at a time (using multiple cores)
  res.list <- bplapply( testCoeff, BPPARAM=MulticoreParam(8), FUN=function(x){
    as.data.frame(topTags(glmLRT(fit, x),Inf))
  })
  dea.names <- gsub("readtype","", testCoeff)
  dea.names <- make.names(gsub(":miRNA",".", dea.names))
  colnames(res.comb)[grepl("logFC",colnames(res.comb))] <- paste0("logFC.", dea.names)
  names(res.list) <- paste0("DEA.",dea.names)
  
  ## add DEAs
  rowData(se)$DEA.spliced.all <- DataFrame(res.comb[rownames(se),])
  for(i in paste0("DEA.",dea.names)){
    rowData(se)[[i]] <- DataFrame(res.list[[i]][rownames(se),])
  }

  return(se)
}
## DEA.spliced.all DEA.spliced.DKO 
##              90              90
## DEA.spliced.all DEA.spliced.DKO 
##             277             277

Stats

## [1] 0.1177802

up-/downregulations at different significance levels